prepareTheBeatlesDatasetForClustering <- function() {
  
  # Read the dataset:
  the.beatles.songs <- read.csv("The Beatles songs dataset, v3.csv", 
                                stringsAsFactors = FALSE)
  # Examine the structure of the dataset:
  # str(<dataset>)
  # summary(the.beatles.songs)
  str(the.beatles.songs)
  summary(the.beatles.songs)
  
  # Check if there are NAs:
  # which(complete.cases(<dataset>) == FALSE)
  which(complete.cases(the.beatles.songs) == FALSE)
  
  # Fix the dataset for analysis:
  source("Get rid of NAs.R")
  the.beatles.songs <- fix.the.dataset(the.beatles.songs)
  
  # Focus on numeric variables only (for using K-Means):
  the.beatles.songs.num <- the.beatles.songs[, c("Title", "Duration", "Other.releases", "Covered.by", "Top.50.Billboard")]
  
  # Change rownames to the song titles:
  # rownames(the.beatles.songs.num) <- the.beatles.songs$Title          # does not work, produces:
  # > Error in `row.names<-.data.frame`(`*tmp*`, value = value) : 
  # >   duplicate 'row.names' are not allowed                           # indicates duplicates in the Title column
  # length(the.beatles.songs$Title)                                     # 310
  # length(unique(the.beatles.songs$Title))                             # 309!
  # duplicated(the.beatles.songs$Title)                                 # TRUE for element 138
  # which(the.beatles.songs$Title == the.beatles.songs$Title[138])      # 137 138
  # the.beatles.songs$Title[137:138]                                    # both are "I'm Talking About You"
  # the.beatles.songs$Title[137] <- "I'm Talking About You (Star Club)" # so make them unique
  # the.beatles.songs$Title[138] <- "I'm Talking About You (BBC)"
  # rownames(the.beatles.songs.num) <- the.beatles.songs$Title          # it will work now
  # the.beatles.songs.num$Title <- NULL                                 # not needed any longer
  the.beatles.songs$Title[137] <- "I'm Talking About You (Star Club)"
  the.beatles.songs$Title[138] <- "I'm Talking About You (BBC)"
  rownames(the.beatles.songs.num) <- the.beatles.songs$Title
  the.beatles.songs.num$Title <- NULL
  
  # Save this version of the dataset for future reuse:
  # saveRDS(object = <dataframe or another R object>, file = "<filename>")  # save R object for the next session
  # <dataframe or another R object> <- readRDS(file = "<filename>")         # restore R object in the next session
  saveRDS(object = the.beatles.songs.num, 
          file = "The Beatles songs dataset (numeric), v4.1.RData")
  
}
